import pandas
import sys
import numpy as np
import argparse
import os

def make_knowledgebase(raw_excel_path,out_txt_path):
    df = pandas.read_excel(raw_excel_path)
    df = df.replace(np.nan,'')
    # df = df.drop_duplicates()
    # df = df.reset_index(drop=True)
    with open(out_txt_path, "w", encoding="utf-8") as f:
        all_str_arr = []
        for index, row in df.iterrows():
            tmp_str = f'''名称：{row['名称']}\n近义词：{row['近义词']}\n来源：{row['来源（保留书名号）']}\n正文: {row['正文']}\n'''
            all_str_arr.append(tmp_str)
        all_str = "*****\n".join(all_str_arr)
        f.write(all_str)


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="dify批量运行器")
    parser.add_argument('-rep',"--raw_excel_path", type = str, help = "原始excel文件地址")
    parser.add_argument('-otp',"--out_txt_path", type = str, help = "输出txt文件地址")
    args = parser.parse_args()
    make_knowledgebase(args.raw_excel_path,args.out_txt_path)